
fb_integration_fp <- file.path(
  "..",
  "replication_regional_data",
  "derived",
  "regional_analysis_data.dta"
)

ess_8_fp <- file.path(
  "..",
  "replication_regional_data",
  "raw",
  "european_social_survey",
  "ess8e02_2",
  "ess8e02_2.csv"
)
ess_9_fp <- file.path(
  "..",
  "replication_regional_data",
  "raw",
  "european_social_survey",
  "ess9e03_1",
  "ess9e03_1.csv"
)

nuts_1_code <- c(
  "DE1",
  "DE2",
  "DE3",
  "DE4",
  "DE5",
  "DE6",
  "DE7",
  "DE8",
  "DE9",
  "DEA",
  "DEB",
  "DEC",
  "DED",
  "DEE",
  "DEF",
  "DEG"
)

state_name <- c(
  "Baden-Württemberg",
  "Bavaria",
  "Berlin",
  "Brandenburg",
  "Bremen",
  "Hamburg",
  "Hesse",
  "Mecklenburg-Vorpommern",
  "Lower Saxony",
  "North Rhine-Westphalia",
  "Rhineland-Palatinate",
  "Saarland",
  "Saxony",
  "Saxony-Anhalt",
  "Schleswig-Holstein",
  "Thuringia"
)

#' returns the filepath corresponding a particular nuts_level
#'
#' @param nuts_level the nuts aggregation level to use
#' @returns the filepath of immigration sentiment corresponding to the right nuts_level
german_immigration_sentiment_nuts_fp = function(nuts_level) {
  german_immigration_sentiment_dir = file.path("..", "replication_regional_data", "derived")
  path = file.path(
    german_immigration_sentiment_dir,
    sprintf("nuts%d_evs_responses.csv", nuts_level)
  )
  return(path)
}

#' Aggregates survey data for plots, joins to fb_integration measures
#' and renames region codes to names.
#'
#' @param .data
create_data_for_correlation <- function(.data) {
  data_for_corr <- .data %>%
    filter(cntry == "DE") %>%
    select(cntry, region, sclmeet, sclact, anweight, inprdsc) %>%
    mutate(
      weight = anweight,
      # values outside the specified ranges are considered missing or unreported
      sclact = ifelse(sclact %in% c(1, 2, 3, 4, 5), sclact, NA),
      sclmeet = ifelse(sclmeet %in% c(1, 2, 3, 4, 5, 6, 7), sclmeet, NA),
      inprdsc = ifelse(inprdsc %in% c(0, 1, 2, 3, 4, 5, 6), inprdsc, NA)
    ) %>%
    mutate(inprdsc = plyr::mapvalues(inprdsc, c(0, 1, 2, 3, 4, 5, 6), c(0, 1, 2, 3, 4, 8, 10))) %>%
    group_by(region) %>%
    summarise(
      how_often_take_part_in_social_act = sum(sclact * weight, na.rm = TRUE) / sum(weight),
      how_often_meet_other = sum(sclmeet * weight, na.rm = TRUE) / sum(weight),
      how_many_close_people_in_life = sum(inprdsc * weight, na.rm = TRUE) / sum(weight)
    ) %>%
    inner_join(fb_int_nuts1, join_by(region == nuts1)) %>%
    mutate(
      region = plyr::mapvalues(region, nuts_1_code, state_name),
      friending_integration,
      general_friending,
      syrians_producing_german_content,
      relative_friending,
      how_often_take_part_in_social_act,
      how_often_meet_other,
      how_many_close_people_in_life,
      pop_size
    )
  
  return(data_for_corr)
}

#' Function to create and save scatter plots of general friendliness.
#'
#' @param .data the data to use to create the scatterplot, must contain the correct column
#' @param y_var the y_variable to plot
#' @param y_lab the y label to use in the plot
#' @param survey the survey the data came from
create_and_save_scatter <- function(.data, y_var, y_lab, survey) {
  cor_value <- cor(.data$general_friending, .data[[y_var]])
  cor_value <- round(cor_value, 2)
  
  x_mean <- mean(range(.data$general_friending))
  
  ggplot(.data, aes(x = general_friending, y = !!sym(y_var))) +
    geom_point(aes(size = pop_size)) +
    geom_smooth(
      aes(weight = pop_size),
      method = "lm",
      se = FALSE,
      color = "#003A4F",
      linetype = "dashed"
    ) +
    geom_text_repel(aes(label = region)) +
    annotate(
      "text",
      x = -Inf,
      y = Inf,
      label = paste("Correlation = ", cor_value),
      vjust = 2,
      hjust = -0.05,
      size = 5
    ) +
    labs(
      x = "General Friending",
      y = y_lab
    ) +
    scale_size_continuous(labels = scales::label_number(suffix = "M", scale = 1e-6)) +
    guides(size = guide_legend(title = "Population")) +
    oi_style() +
    theme(
      axis.line = element_line(color = "black"),
      axis.text = element_text(color = "black"),
      axis.title = element_text(color = "black")
    )
  
  plot_name <- sprintf(
    "general_friending_vs_%s_%s.png",
    y_var, survey
  )
  
  output_path <- file.path(
    "..",
    "results_regional_analysis",
    plot_name
  )
  
  ggsave(output_path,
         width = 10,
         height = 8,
         dpi = 300
  )
}


#' Creates correlation matrix plot and friendliness plots for a
#' a given survey.
#'
#' @param .data data prepared for correlation analysis with `create_data_for_correlation`
#' @param survey_name name of the survey, used for saving plots
create_friendliness_plots <- function(.data, survey_name) {
  corr_unwt <- cov.wt(.data %>% select(!region), cor = TRUE)
  
  set_oi_palette()
  
  .data %>% create_and_save_scatter(
    y_var = "how_often_take_part_in_social_act",
    y_lab = "How Often Do You Take Part in Social Activities?",
    survey = survey_name
  )
  
  .data %>% create_and_save_scatter(
    y_var = "how_often_meet_other",
    y_lab = "How Often Do You Meet with Others?",
    survey = survey_name
  )
}